# Importing Neccessary Libraries
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import seaborn as sns
from matplotlib import pyplot as plt
#!pip install pingouin
import pingouin as pg
# Reading the data into pandas dataframe
df = pd.read_excel('Daten.xlsx')
# Viewing the first 5 rows of the the dataset
df.head()
Variante | Wiederholung | Beigabemenge [Vol.-%] | Material | pH | g KCl / l Substrat | mg NO3-N / l Substrat | mg NH4-N / l Substrat | mg P2O5 / l Substrat | mg K2O / l Substrat | ... | Frischmasse [g] | Blütenanzahl | Wurzelbonitur [1-5] | Wurzelbonitur [1-5]i | Pflanzenhöhe [cm] | Pflanzenhöhe [cm]i | Pflanzenbreite [cm] | Pflanzenbreite [cm]i | Chlorophyllgehalt | Trockenmasseanteil [%] | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 0 | Torf | 5.8 | 0.26 | 3.704760 | 3.280000 | 14.270560 | 16.400 | ... | 95.300000 | 42 | 4.75 | 4.500000 | 13 | 22.166667 | 20.714286 | 46 | 38.566667 | 11.235955 |
1 | 1 | 2 | 0 | Torf | 5.7 | 0.33 | 3.659580 | 3.240000 | 16.684848 | 17.172 | ... | 86.150000 | 37 | 4.75 | 5.000000 | 11 | 23.000000 | 20.785714 | 44 | 37.866667 | 11.475410 |
2 | 1 | 3 | 0 | Torf | 5.7 | 0.27 | 3.834508 | 3.643685 | 15.016605 | 19.440 | ... | 86.933333 | 36 | 4.75 | 5.000000 | 13 | 28.166667 | 21.357143 | 41 | 39.633333 | 11.111111 |
3 | 1 | 4 | 0 | Torf | 5.7 | 0.31 | 3.659580 | 3.240000 | 12.106260 | 16.200 | ... | 89.683333 | 34 | 4.25 | 4.666667 | 14 | 26.833333 | 19.928571 | 43 | 37.166667 | 11.949686 |
4 | 2 | 1 | 30 | Holzfasern | 6.0 | 0.46 | 2.891520 | 5.016666 | 18.884092 | 32.256 | ... | 70.816667 | 40 | 5.00 | 5.000000 | 12 | 23.833333 | 19.357143 | 36 | 35.766667 | 11.206897 |
5 rows × 25 columns
# Checking the datatypes of each column
df.dtypes
Variante int64 Wiederholung int64 Beigabemenge [Vol.-%] int64 Material object pH float64 g KCl / l Substrat float64 mg NO3-N / l Substrat float64 mg NH4-N / l Substrat float64 mg P2O5 / l Substrat float64 mg K2O / l Substrat float64 Gesamt-N [% TS] float64 P [% TS] float64 K [% TS] float64 Ca [% TS] float64 Mg [% TS] float64 Frischmasse [g] float64 Blütenanzahl int64 Wurzelbonitur [1-5] float64 Wurzelbonitur [1-5]i float64 Pflanzenhöhe [cm] int64 Pflanzenhöhe [cm]i float64 Pflanzenbreite [cm] float64 Pflanzenbreite [cm]i int64 Chlorophyllgehalt float64 Trockenmasseanteil [%] float64 dtype: object
# Changing the datatype of columns as per requirements for analysis
df['pH'] = pd.to_numeric(df['pH'],errors = 'coerce')
df['P [% TS]'] = pd.to_numeric(df['P [% TS]'],errors = 'coerce')
df['Beigabemenge [Vol.-%]'] = df['Beigabemenge [Vol.-%]'].astype('string')
# Creating a label column to create boxplot
df['label'] = df['Beigabemenge [Vol.-%]'] + '% ' + df['Material']
# Sorting the dataframe by label
df = df.sort_values(by='label')
# Creating boxplot between varient and pH
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="pH",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs pH', fontsize =20)
plt.show()
# Creating boxplot between Variente and g KCl / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="g KCl / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs g KCl / l Substrat', fontsize =20)
plt.show()
# Creating boxplot between variante and mg NO3-N / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="mg NO3-N / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs mg NO3-N / l Substrat', fontsize =20)
plt.show()
# Creating boxplot between variente and mg NH4-N / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="mg NH4-N / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs mg NH4-N / l Substrat', fontsize =20)
plt.show()
# Creating boxplot for Variente vs mg P2O5 / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="mg P2O5 / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs mg P2O5 / l Substrat', fontsize =20)
plt.show()
# Creating boxplot for Variente vs mg K2O / l Substrat
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="mg K2O / l Substrat",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs mg K2O / l Substrat', fontsize =20)
plt.show()
# Creating boxplot for Variente vs P [% TS]
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="P [% TS]",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs P [% TS]', fontsize =20)
plt.show()
# Creating boxplot for Variente vs K [% TS]
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="K [% TS]",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs K [% TS]', fontsize =20)
plt.show()
# Creating boxplot for Variente vs Ca [% TS]
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="Ca [% TS]",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs Ca [% TS]', fontsize =20)
plt.show()
# Creating boxplot for Variente vs Mg [% TS]
sns.set(rc = {'figure.figsize':(30,15)})
ax = sns.boxplot(x="label",y="Mg [% TS]",data=df)
ax.tick_params(axis='x', rotation=45)
ax.set_title('Variente vs Mg [% TS]', fontsize =20)
plt.show()
# List of Dependant Variables to execute the loop
DV = ['pH', 'g KCl / l Substrat', 'mg NO3-N / l Substrat', 'mg NH4-N / l Substrat', 'mg P2O5 / l Substrat', 'mg K2O / l Substrat',
'Gesamt-N [% TS]', 'P [% TS]', 'K [% TS]', 'Ca [% TS]', 'Mg [% TS]', 'Frischmasse [g]', 'Blütenanzahl', 'Wurzelbonitur [1-5]', 'Wurzelbonitur [1-5]i',
'Pflanzenhöhe [cm]', 'Pflanzenhöhe [cm]i', 'Pflanzenbreite [cm]',
'Pflanzenbreite [cm]i', 'Chlorophyllgehalt', 'Trockenmasseanteil [%]']
# Doing Anova Analysis
list_results = []
for dv in DV:
# run anova and create dv variable to identify dependent variable
aov = pg.anova(data=df, dv=dv, between='Variante', detailed=True)\
.assign(dv=dv)
# append to list of results
list_results.append(aov)
# concat all results into a DataFrame
df_results = pd.concat(list_results, axis=0)
# Dropping Within Rows
df_results.drop(df_results.index[df_results['Source'] == 'Within'], inplace=True)
# Export to Excel
df_results.to_excel('Anova.xlsx')
df_results
Source | SS | DF | MS | F | p-unc | np2 | dv | |
---|---|---|---|---|---|---|---|---|
0 | Variante | 1.315000e+00 | 10 | 0.131500 | 19.074725 | 5.706652e-11 | 0.852512 | pH |
0 | Variante | 3.406064e+00 | 10 | 0.340606 | 17.898105 | 1.342357e-10 | 0.844326 | g KCl / l Substrat |
0 | Variante | 2.928979e+03 | 10 | 292.897886 | 2.055398 | 5.871264e-02 | 0.383799 | mg NO3-N / l Substrat |
0 | Variante | 8.194135e+03 | 10 | 819.413528 | 29.275912 | 1.413117e-13 | 0.898698 | mg NH4-N / l Substrat |
0 | Variante | 7.110291e+04 | 10 | 7110.291326 | 30.411283 | 8.144041e-14 | 0.902110 | mg P2O5 / l Substrat |
0 | Variante | 3.600441e+06 | 10 | 360044.083265 | 52.798553 | 2.140272e-17 | 0.941175 | mg K2O / l Substrat |
0 | Variante | 2.026364e+00 | 10 | 0.202636 | 4.487919 | 4.974350e-04 | 0.576267 | Gesamt-N [% TS] |
0 | Variante | 8.042137e-02 | 10 | 0.008042 | 5.475764 | 1.039140e-04 | 0.631156 | P [% TS] |
0 | Variante | 5.384432e+01 | 10 | 5.384432 | 111.753635 | 5.665210e-22 | 0.972163 | K [% TS] |
0 | Variante | 2.014378e-01 | 10 | 0.020144 | 4.997903 | 2.270779e-04 | 0.609656 | Ca [% TS] |
0 | Variante | 2.027775e-01 | 10 | 0.020278 | 71.291246 | 5.524975e-19 | 0.957042 | Mg [% TS] |
0 | Variante | 1.362700e+04 | 10 | 1362.699505 | 39.392323 | 1.799251e-15 | 0.922703 | Frischmasse [g] |
0 | Variante | 3.034045e+03 | 10 | 303.404545 | 5.349906 | 1.134698e-04 | 0.618493 | Blütenanzahl |
0 | Variante | 3.475000e+01 | 10 | 3.475000 | 20.850000 | 1.697146e-11 | 0.863354 | Wurzelbonitur [1-5] |
0 | Variante | 5.315025e+01 | 10 | 5.315025 | 31.492519 | 4.898803e-14 | 0.905152 | Wurzelbonitur [1-5]i |
0 | Variante | 8.163636e+01 | 10 | 8.163636 | 8.353488 | 1.420132e-06 | 0.716823 | Pflanzenhöhe [cm] |
0 | Variante | 2.370694e+02 | 10 | 23.706944 | 10.234887 | 1.463084e-07 | 0.756186 | Pflanzenhöhe [cm]i |
0 | Variante | 3.659474e+02 | 10 | 36.594736 | 31.640511 | 4.575051e-14 | 0.905554 | Pflanzenbreite [cm] |
0 | Variante | 1.442227e+03 | 10 | 144.222727 | 28.541829 | 2.038334e-13 | 0.896363 | Pflanzenbreite [cm]i |
0 | Variante | 4.438153e+01 | 10 | 4.438153 | 2.537912 | 2.153880e-02 | 0.434729 | Chlorophyllgehalt |
0 | Variante | 1.103886e+01 | 10 | 1.103886 | 0.906046 | 5.387308e-01 | 0.215415 | Trockenmasseanteil [%] |
# Doing Post-Hoc Analysis
list_results = []
for dv in DV:
# run anova and create dv variable to identify dependent variable
posthocs = pg.pairwise_ttests(data=df, dv=dv, between='Variante')\
.assign(dv=dv)
# append to list of results
list_results.append(posthocs)
# concat all results into a DataFrame
df_results2 = pd.concat(list_results, axis=0)
# Export to Excel
df_results2.to_excel('Post_Hoc.xlsx')
df_results2
C:\Users\shahz\anaconda3\lib\site-packages\pingouin\bayesian.py:145: IntegrationWarning: The integral is probably divergent, or slowly convergent. integr = quad(fun, 0, np.inf, args=(t, n, r, df))[0]
Contrast | A | B | Paired | Parametric | T | dof | alternative | p-unc | BF10 | hedges | dv | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Variante | 1 | 2 | False | True | -3.655631 | 6.0 | two-sided | 0.010635 | 4.984 | -2.247758 | pH |
1 | Variante | 1 | 3 | False | True | -2.777460 | 6.0 | two-sided | 0.032104 | 2.49 | -1.707792 | pH |
2 | Variante | 1 | 4 | False | True | -5.656854 | 6.0 | two-sided | 0.001311 | 21.217 | -3.478261 | pH |
3 | Variante | 1 | 5 | False | True | -7.549834 | 6.0 | two-sided | 0.000280 | 67.374 | -4.642208 | pH |
4 | Variante | 1 | 6 | False | True | -1.805788 | 6.0 | two-sided | 0.120979 | 1.171 | -1.110335 | pH |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
50 | Variante | 8 | 10 | False | True | -1.537128 | 6.0 | two-sided | 0.175175 | 0.967 | -0.945142 | Trockenmasseanteil [%] |
51 | Variante | 8 | 11 | False | True | -1.054266 | 6.0 | two-sided | 0.332353 | 0.716 | -0.648242 | Trockenmasseanteil [%] |
52 | Variante | 9 | 10 | False | True | -0.800652 | 6.0 | two-sided | 0.453861 | 0.631 | -0.492301 | Trockenmasseanteil [%] |
53 | Variante | 9 | 11 | False | True | -0.450991 | 6.0 | two-sided | 0.667836 | 0.556 | -0.277303 | Trockenmasseanteil [%] |
54 | Variante | 10 | 11 | False | True | 0.291481 | 6.0 | two-sided | 0.780498 | 0.537 | 0.179224 | Trockenmasseanteil [%] |
1155 rows × 12 columns